################################################################################
###
### Filename: 	Interpolation-Cotton.R
### Author: 	  David A. Bateman
### Function: 	Most of this code is based on interpolation.R, 
###             submitted by Acharya, Blackwell, and Sen to the Journal of 
###             Politics dataverse to reproduce figures and tables in Acharya, 
###             Blackwell, and Sen (2016). 
###             This file interpolates cotton suitability backwards into the 
###             county boundaries of 1830. Needed for online appendix material
###             in Deeper Roots.
###	Last update: January 1, 2023
###
################################################################################

rm(list = ls())


library(raster)
library(rgdal)
library(rgeos)
library(plyr)
library(reshape)
library(ineq)
library(maps)
library(maptools)
library(foreign)
library(RColorBrewer)
library(classInt)
data(state.fips)
state.fips <- unique(state.fips[,c("fips","abb")])
state.fips$abb <- as.character(state.fips$abb)
state.fips <- rbind(state.fips, c(2, "AK"))
state.fips <- rbind(state.fips, c(15, "HI"))
rownames(state.fips) <- state.fips$abb
fips.state <- state.fips
rownames(fips.state) <- fips.state$fips
data(county.fips)
setwd("../Deeper Roots")


## Historical county data available from http://publications.newberry.org/ahcbp/
histcounties <- readOGR("Dataverse/Shapefiles/US_AtlasHCB_Counties_Gen001/US_HistCounties_Gen001_Shapefile", layer = "US_HistCounties_Gen001")
histcounties$fips <- as.numeric(as.character(histcounties$FIPS))

histstates <- readOGR("Dataverse/Shapefiles/US_AtlasHCB_StateTerr_Gen0001/US_HistStateTerr_Gen0001_Shapefile", layer = "US_HistStateTerr_Gen0001")


## These data sources are originally from Jeremy Atack. The links and code are 
## from interpolation.R (Acharya, Blackwell, and Sen (2016)).
## https://my.vanderbilt.edu/jeremyatack/data-downloads/
rail <- readOGR("Dataverse/Shapefiles/RR1826-1911Modified0509161", "RR1826-1911Modified050916")
rail <- rail[rail$InOpBy < 1860,]
rivers <- readOGR("Dataverse/Shapefiles/SteamboatNavigatedRiverContigUSAprojection", layer = "SteamboatNavigatedRiverContigUSAprojection")
rivers <- rivers[rivers$START < 1860 & rivers$END > 1870,]
canals <- readOGR("Dataverse/Shapefiles/19thC_Canals_March2017", layer = "19thC_Canals_March2017")
canals <- canals[which(canals$OPENED < 1860 & canals$CLOSED > 1860),]
## coastal/shoreline data comes from the NOAA
## https://coast.noaa.gov/htdata/SocioEconomic/NOAA_CoastalCountyDefinitions.pdf
##
coastal <- read.csv("Dataverse/Shapefiles/Coastal/coastal.csv")


histcounties <- spTransform(histcounties, CRS(proj4string(rivers)))
histstates <- spTransform(histstates, CRS(proj4string(rivers)))
canals <- spTransform(canals, CRS(proj4string(rivers)))
rail <- spTransform(rail, CRS(proj4string(rivers)))

histcounties$fips[which(histcounties$NAME == "CARROLL (ext)")] <- 22035

cc1860 <- which(histcounties$START_N <= 18590101 & histcounties$END_N >= 18590101)
counties1860 <- histcounties[cc1860,]


## Fix South Carolina for 1860
kk <- as.character(counties1860$NAME[counties1860$STATE_TERR == "South Carolina"])
kk <- sub(" Dist.", "", kk)
kk <- paste("south carolina,",tolower(kk), sep = "")
kk.fips <- NA
kk.fips[kk %in% county.fips$polyname] <- unlist(lapply(kk, function(x) county.fips$fips[county.fips$polyname == x]))
counties1860$fips[counties1860$STATE_TERR == "South Carolina"] <- kk.fips
counties1860 <- counties1860[!is.na(counties1860$fips),]
newpolys <- unionSpatialPolygons(counties1860, counties1860$fips)
kk <- unique(as.data.frame(counties1860)[,c("fips", "STATE_TERR")])
rownames(kk) <- as.character(kk$fips)
counties1860 <- SpatialPolygonsDataFrame(newpolys, data = kk)
counties1860$coarea1860.km2 <- gArea(counties1860, byid = TRUE)/(1000^2)


cc2000 <- which(histcounties$START_N <= 20001231 & histcounties$END_N >= 20001231)
counties2000 <- histcounties[cc2000,]
counties2000$fips <- as.numeric(as.character(counties2000$FIPS))
counties2000 <- counties2000[!is.na(counties2000$fips),]
counties2000$coarea2000.km2 <- gArea(counties2000, byid = TRUE)/(1000^2)
#counties2000 <- counties2000[!(counties2000$STATE_TERR %in% c("Alaska", "Hawaii")),]
counties2000$coastal <- 1 * (counties2000$fips %in% coastal$fips)
counties2000$river <- 1 * (colSums(gCrosses(counties2000, rivers, byid = TRUE)) > 0)
counties2000$canal <- 1 * (colSums(gCrosses(counties2000, canals, byid = TRUE)) > 0)
counties2000$water1860 <- 1 * (counties2000$coastal + counties2000$river + counties2000$canal > 0)
counties2000$rail1860 <- 1 * (colSums(gCrosses(counties2000, rail, byid = TRUE)) > 0)

ss2000 <- which(histstates$START_N <= 20001231 & histstates$END_N >= 20001231)
states2000 <- histstates[ss2000,]
states2000 <- states2000[!(states2000$ABBR_NAME %in% c("AK", "HI")),]
southern.states <- c("Alabama", "Arkansas", "Georgia", "Kentucky", "Louisiana",
                     "Mississippi", "Missouri", "North Carolina", "South Carolina", "Tennessee", 
                     "Virginia", "West Virginia", "Maryland", "Delaware")

south <- states2000$ABBR_NAME %in% c("AL", "AR", "MS", "LA","MO","KY","WV","VA","MD","DE","NC","SC","GA","TN","DC")
states2000$south <- south
the.south <- unionSpatialPolygons(states2000, south)
the.south <- spTransform(the.south, CRS(proj4string(rivers)))


## Get 1830-1850 censuses and fix South Carolina for those years as well
## Data comes from ICPSR 35206
## http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/35206
##1830
cc1830 <- which(histcounties$START_N <= 18300101 & histcounties$END_N >= 18300101)
counties1830 <- histcounties[cc1830,]
kk <- as.character(counties1830$NAME[counties1830$STATE_TERR == "South Carolina"])
kk <- sub(" Dist.", "", kk)
kk <- paste("south carolina,",tolower(kk), sep = "")
kk.fips <- NA
kk.fips[kk %in% county.fips$polyname] <- unlist(lapply(kk, function(x) county.fips$fips[county.fips$polyname == x]))
counties1830$fips[counties1830$STATE_TERR == "South Carolina"] <- kk.fips
counties1830 <- counties1830[!is.na(counties1830$fips),]
newpolys <- unionSpatialPolygons(counties1830, counties1830$fips)
kk <- unique(as.data.frame(counties1830)[,c("fips", "STATE_TERR")])
rownames(kk) <- as.character(kk$fips)
counties1830 <- SpatialPolygonsDataFrame(newpolys, data = kk)
counties1830$coarea1830.km2 <- gArea(counties1830, byid = TRUE)/(1000^2)

## 1830 Census
## Data comes from ICPSR 35206
## http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/35206
census1830 <- read.dta("Dataverse/Census/census1830.dta")
vartable <- cbind(names(census1830), attr(census1830, "var.labels"))
cvars <- c("totpop", "stot", "fips", "fctot")
census1830 <- census1830[census1830$level == 1, ]
census1830 <- census1830[, cvars]
census1830 <- census1830[!is.na(census1830$fips),]
census1830$fips <- floor(census1830$fips)
census1830 <- cast(melt(census1830, id = "fips"), fips ~ variable, sum)
census1830$sprop <- census1830$stot/census1830$totpop
census1830$fbprop <- with(census1830, fctot/totpop)
agcensus1840 <- read.dta("Dataverse/Census/agcensus1840.dta")
vartable <- cbind(names(agcensus1840), attr(agcensus1840, "var.labels"))
cvars <- c("FIPS", "TOTAGOUT")
agcensus1840 <- agcensus1840[agcensus1840$LEVEL == 1, ]
agcensus1840 <- agcensus1840[, cvars]
names(agcensus1840)[1] <- "fips"
names(agcensus1840)[2] <- "totagout"
census1830 <- merge(census1830, agcensus1840, by = "fips", all.x=TRUE)
census1830$fvalpc <- with(census1830, totagout/totpop)
names(census1830)[-1] <- paste(names(census1830)[-1], "1830", sep = "")
census1830 <- census1830[which(census1830$fips %in% census1830$fips),]

census2000 <- read.csv("Dataverse/ABS-Files/census2000-cotton.csv", stringsAsFactors = FALSE)
vartable <- cbind(names(census2000), attr(census2000, "var.labels"))
cvars <- c("cottonsuit", "rugged", "fips", "coarea", "coarea00", "longitude", "latitude")
census2000 <- census2000[census2000$state.abb != "DC", ]
census2000 <- census2000[, cvars]
census2000 <- census2000[!is.na(census2000$fips),]
census2000$fips <- floor(census2000$fips)
census2000 <- cast(melt(census2000, id = "fips"), fips ~ variable, sum)


### Reverse interpolation. Goal is to take cotton suitability as measured in 
### 2000 counties and interpolate it back to 1830 counties
inters_reverse <- over(counties2000, SpatialPolygons(counties1830@polygons,proj4string=counties1830@proj4string), returnList = TRUE)
A.2000 <- matrix(0, nrow = nrow(counties2000), ncol = nrow(counties1830))
for (i in 1:nrow(counties2000)) {
  totarea <- gArea(counties2000[i,])
  for (j in inters_reverse[[i]]) {
    this.int <- gIntersection(counties2000[i,], counties1830[j,])
    if (!is.null(this.int)) A.2000[i,j] <- gArea(this.int)/totarea
  }
}
rownames(A.2000) <- as.character(counties2000$fips)
A.2000[A.2000 < 0.05]  <- 0
A.2000 <- A.2000/rowSums(A.2000)
A0.2000 <- A.2000[as.factor(census2000$fips),]

counties1830$cottonsuit <-NA

posvals <- which(colSums(A0.2000, na.rm=TRUE) > 0)
for (i in posvals) {
  weights2000 <- A0.2000[which(A0.2000[,i] > 0),i]
  these.counties <- census2000[census2000$fips %in% as.numeric(names(which(A0.2000[,i] > 0))),]
  ## NEED THE WEIGHTED AVERAGE
  counties1830$cottonsuit[i] <- (sum(weights2000 * these.counties$cottonsuit, na.rm = TRUE)/sum(weights2000))
}

summary(counties1830[, c("cottonsuit")])
counties1830.df <- as(counties1830, "data.frame")
output <- merge(counties1830.df,census1830, by="fips", all=TRUE)
write.csv(output,"Dataverse/Files/Cotton-Suitability-1830.csv", row.names = FALSE)

#####################################################################
## Make map of cotton suitability in 2000 and 1830
countydata <- read.csv("Dataverse/ABS-Files/abs-jop-countydata-ext-1830.csv", stringsAsFactors = FALSE)
xcounties <- readOGR("Dataverse/Shapefiles/US_AtlasHCB_Counties_Gen001/US_HistCounties_Gen001_Shapefile", layer = "US_HistCounties_Gen001")
xcounties$fips <- as.numeric(as.character(xcounties$FIPS))
xcounties <- spTransform(histcounties, CRS(proj4string(rivers)))

the.south <- spTransform(the.south, CRS(proj4string(rivers)))
xcounties$fips[which(xcounties$NAME == "CARROLL (ext)")] <- 22035

counties1830@data$order <- 1:nrow(counties1830@data)
counties1830@data <- merge(counties1830@data,census1830, by=c("fips"), all.x=TRUE, all.y=FALSE, sort=FALSE)
counties1830@data<-counties1830@data[order(counties1830@data$order),]

### This version of maps from ABS (2016) replication files
cotton.bins <- cut(countydata$cottonsuit, breaks = quantile(countydata$cottonsuit, c(0, 0.4, 0.5, 0.6, 0.7, 0.8, 1), na.rm=TRUE))
mcols <- brewer.pal(6, "Greens")
names(mcols) <- levels(cotton.bins)

tmp.data <- data.frame(fips = countydata$fips, cotton.cols = as.character(mcols[cotton.bins]))
mp <- map("county", plot=FALSE,namesonly=TRUE)
tmp.data <- merge(county.fips, tmp.data, by = "fips", all.x = TRUE)
rownames(tmp.data) <- tmp.data$polyname

map("county", fill = TRUE, col = as.character(tmp.data[mp, "cotton.cols"]),resolution = 0,lty = 0,projection = "polyconic")
map("state",col = "black",fill=FALSE,add=TRUE,lty=1,lwd=1,projection="polyconic")
legend("bottomleft", legend = c("Least Suitable", NA, NA, NA, NA, "Most Suitable"), fill = mcols, bty = "o", border = NA, y.intersp = 0.5, bg = grey(0.8), box.lwd = 0)

### Do it slightly different for 1830 counties. Make a heat map function.
plot.heat1 <- function(xcounties,z,title=NULL,breaks=NULL,reverse=FALSE,cex.legend=1,bw=.2,col.vec=NULL,plot.legend=TRUE) {
  ##Break down the value variable
  if (is.null(breaks)) {
    breaks=
      seq(
        floor(min(xcounties@data[,z],na.rm=TRUE)*10)/10
        ,
        ceiling(max(xcounties@data[,z],na.rm=TRUE)*10)/10
        ,.1)
  }
  xcounties@data$zCat <- cut(xcounties@data[,z],breaks,include.lowest=TRUE)
  cutpoints <- levels(xcounties@data$zCat)
  if (is.null(col.vec)) col.vec <- brewer.pal(6, "Greens")
  col.vec <- append(col.vec, "#FFFFFF", 0)
  if (reverse) {
    cutpointsColors <- rev(col.vec)
  } else {
    cutpointsColors <- col.vec
  }
  levels(xcounties@data$zCat) <- cutpointsColors
  plot(xcounties, border=NA, lwd=1,axes = FALSE, las = 1,col=as.character(xcounties@data$zCat), add=TRUE)
  
  if (plot.legend) legend("bottomleft", cutpoints, fill = cutpointsColors,bty="n",title=title,cex=cex.legend)
}


#####
plot(the.south[2,], density=c(10))
plot.heat1(counties1830[counties1830$STATE_TERR %in% southern.states,],z="cottonsuit",breaks=c(0, 0.01, 0.422, 0.46328144, 0.51276222, 0.5555, 0.60681822, .93), plot.legend=FALSE)

plot(the.south[2,], add=TRUE)
map("state",col = "black",fill=FALSE,add=TRUE,lty=1,lwd=1,projection="polyconic")


